********************************************************************************
*************************** Do File (3): Table 2 *******************************
********************************************************************************

clear
matrix drop _all
set more off

use "$data/EPH_1015_format.dta", clear

destring occupation occupation_unemp occup_all, replace

*** Keep only observations that have all controls used

keep if (occupation !=. & underemp != . & cont_pens !=. & domwk_all !=. & linc_mjob_base08 !=. & lwagehr_mjob_base08 !=. & linc_total_base08 != . & lhours_mjob !=. & age !=. & hhsize !=. & tenure !=. & msa !=. & educyr !=. ///
& attsch_ever !=. & native !=. & lit !=. & gender !=.) | (occupation_unemp != . & age !=. & hhsize !=. & msa !=. & educyr !=. & attsch_ever !=. & native !=. & lit !=. & gender !=.)

********************************************************************************
********************************************************************************

*** Locals for controls


local occup occupation
local base_controls "msa year"
local controls "age age2 hhsize lit native attsch_ever educyr educyr2 i.marstat i.dec_pcfaminc"

********************************************************************************
********************************************************************************
********************************************************************************

** Create indicators for level of education

gen primary = educlv>=2
label var primary "Share with complete primary school"

gen secondary = educlv>=4
label var secondary "Share with complete secondary school"

gen tertiary = educlv==6 | educlv==8
label var tertiary "Share with complete higher education"

** Create indicators for internal and foreign migrant

gen migrint = (native==1 & migrant==1)
label var migrint "Share internal migrant"

gen migrfor = (native==0 & migrant==1)
label var migrfor "Share foreign migrant"

** Create indicator for different marital status

gen married = marstat == 2
label var married "Share married"

gen divorced = marstat == 3
label var divorced "Share divorced"

gen widow = marstat == 4
label var widow "Share widow"

********************************************************************************
********************************************************************************
********************************************************************************

* Run regression for pension contribution to restrict the sample to use

qui reghdfe cont_pens domwk_all treat_dwall `controls' if ${ctrl_group} == 1 & unemployed == 0, absorb(`base_controls' `occup') vce(cluster msa)
gen sample_reg = 1 if e(sample)


* First run all the regressions to get the p-values and obtain adjusted p-values

foreach var in age migrint migrfor hhsize married divorced widow lit attsch_ever primary secondary tertiary educyr {

	egen mean_`var' = mean(cond(sample_reg == 1 & ${ctrl_group} == 1 & treat == 0,`var',.))
	egen sd_`var' = sd(cond(sample_reg == 1 & ${ctrl_group} == 1 & treat == 0,`var',.))
	gen std_`var' = (`var' - mean_`var') / sd_`var'

	qui reghdfe std_`var' domwk treat_dw if empstat == 1 & sample_reg == 1 & ${ctrl_group} == 1, absorb(`base_controls' `occup') vce(cluster msa)
	
	capture matrix list uP_treat
	
	if _rc != 0 {
	    
		matrix define uP_treat = 2*ttail(e(N),abs(_b[treat_dw]/_se[treat_dw]))
		
	}
	
	else {
	    
		matrix uP_treat = uP_treat, 2*ttail(e(N),abs(_b[treat_dw]/_se[treat_dw]))
	}
	
}

*** Label the variables as they appear in the table

label var std_age "Age"
label var std_migrint "Internal migrant"
label var std_migrfor "Foreign migrant"
label var std_hhsize "Household size"
label var std_married "Married"
label var std_divorced "Divorced"
label var std_widow "Widow"
label var std_lit "Literate"
label var std_attsch_ever "Attended school"
label var std_primary "Primary school"
label var std_secondary "Secondary school"
label var std_tertiary "Tertiary school"
label var std_educyr "Years of education"


* Generate the adjusted p-values and put them in a matrix that will be exported with the regression estimates

matrix uP_treat = uP_treat'
svmat uP_treat, names(unad_p)

qqvalue unad_p1, method(hochberg) qvalue(hochbergP)

qui sum unad_p1
mkmat unad_p1 hochbergP if _n <= r(N), mat(adjPval)
matrix adjPval = adjPval[1..r(N),2]

** Rerun all the regressions, including the adjusted p-values

local i = 1
local app replace

foreach var in std_age std_migrint std_migrfor std_hhsize std_married std_divorced std_widow std_lit std_attsch_ever std_primary std_secondary std_tertiary std_educyr {

	local varlabel: variable label `var'
	qui reghdfe `var' domwk treat_dw if empstat == 1 & sample_reg == 1 & ${ctrl_group} == 1, absorb(`base_controls' `occup') vce(cluster msa)
	local qval = round(adjPval[`i',1], 0.001)
	outreg2 using "$tables/Table_2", `app' excel keep(treat_dw) nocons nor2 dec(3) label ctitle(`varlabel') addstat(q-value, `qval') ///
	addtext(Year Fixed Effects, Yes, Occupation Fixed Effects, Yes, Metropolitan Area Fixed Effects, Yes, Number of clusters, "`e(N_clust)'")
	
	local app append
	local i = `i' + 1
}